start_date <- "2017-01-01"
end_date <- "2019-12-31"
f1<-function(d2, d1){
n_weeks <- floor(as.numeric(difftime(d2, d1, units="weeks")))
}
f2<-function(d2, d1){
n_weeks <- floor(as.numeric(difftime(as.Date(d2)
, as.Date(d1), units = "weeks")))
}
m1<-microbenchmark(
Nocast = f1(end_date, start_date),
Cast = f2(end_date, start_date),
times = 1000
)
print(m1)
## Unit: microseconds
## expr min lq mean median uq max neval
## Nocast 310.970 318.805 341.0262 322.822 338.1215 3119.992 1000
## Cast 113.432 116.888 128.3325 118.451 124.6780 2886.206 1000
fig <- fbox_plot(m1, "microseconds")
fig
create_c <- function (n){
x <- c()
for (i in seq(n)) {
x <- c(x, i)
}
}
create_vector <- function (n){
x <- vector("integer", n)
for (i in seq(n)) {
x[i] <- i
}
}
m3 <- microbenchmark(
with_c = create_c(1e4),
with_vector = create_vector(1e4),
times = 10
)
print(m3)
## Unit: microseconds
## expr min lq mean median uq max neval
## with_c 66345.398 67647.458 70492.49 68257.341 72590.773 78978.512 10
## with_vector 333.302 338.582 629.66 358.629 374.308 3098.142 10
fig <- fbox_plot(m3, "microseconds")
fig
vector <- runif(1e8)
w1 <- function(x){
d <- length(which(x > .5))
}
w2 <- function(x){
d <- sum(x > .5)
}
m4 <- microbenchmark(
which = w1(vector),
nowhich = w2(vector),
times = 10
)
print(m4)
## Unit: milliseconds
## expr min lq mean median uq max neval
## which 628.7841 629.3828 650.4062 630.1411 630.5834 765.8051 10
## nowhich 217.9396 218.6715 228.2343 222.0552 222.6060 293.5930 10
fig <- fbox_plot(m4, "miliseconds")
fig
n <- 1e4
dt <- data.table(
a = seq(n), b = runif(n)
)
v1 <- function(dt){
d <- mean(dt[dt$b > .5, ]$a)
}
v2 <- function(dt){
d <- mean(dt$a[dt$b > .5])
}
m5 <- microbenchmark(
row_operation = v1(dt),
column_operation = v2(dt),
times = 10
)
print(m5)
## Unit: microseconds
## expr min lq mean median uq max neval
## row_operation 159.407 165.890 854.5208 173.0030 193.792 5028.604 10
## column_operation 61.795 68.478 287.6566 74.5845 80.109 2164.719 10
fig <- fbox_plot(m5, "microseconds")
fig
The function seq prevents when the second part of the 1:x is zero
num <- 1e7
s1 <- function(num){
d <- mean(1:num)
}
s2 <- function(num){
d <- mean(seq(num))
}
m6<-microbenchmark(
noseq = s1(num),
seq = s2(num),
times = 30
)
print(m6)
## Unit: milliseconds
## expr min lq mean median uq max neval
## noseq 69.85895 69.95165 73.70223 70.01365 70.26753 178.10112 30
## seq 69.89975 69.98960 70.12726 70.07465 70.15742 71.64522 30
fig <- fbox_plot(m6, "miliseconds")
fig
large_dataset <- data.table(
id = 1:1000000,
value = sample(letters, 1000000, replace = TRUE)
)
a1 <- function(x){
d <- x %>% mutate(code = paste0(id, "_", value))
}
a2 <- function(x){
d <- x %>% mutate(code = glue("{id}_{value}"))
}
m7 <- microbenchmark(
with_paste = a1(large_dataset),
with_glue = a2(large_dataset),
times = 20
)
print(m7)
## Unit: milliseconds
## expr min lq mean median uq max neval
## with_paste 562.4303 569.5832 574.0144 574.0746 578.4422 590.1323 20
## with_glue 574.4031 586.6158 612.3756 588.7625 594.0416 994.3517 20
fig <- fbox_plot(m7, "miliseconds")
fig
# Example data
data <- data.table(group = rep(seq(10), each = 100), value = rnorm(1000))
print(table(data$group))
##
## 1 2 3 4 5 6 7 8 9 10
## 100 100 100 100 100 100 100 100 100 100
# Using a for loop
for_loop_function <- function(data) {
res <- list()
unique_groups <- unique(data$group)
for(this_group in unique_groups) {
res[[this_group]] <- data %>% filter(group == this_group)
}
return(res)
}
sapply_function <- function(data){
unique_groups <- unique(data$group)
res <- list()
sapply(unique_groups, function(this_group){
res[[this_group]] <<- data %>% filter(group == this_group)
})
return(res)
}
m8 <- microbenchmark(
for_loop = for_loop_function(data),
sapply = sapply_function(data),
times = 500
)
print(m8)
## Unit: milliseconds
## expr min lq mean median uq max neval
## for_loop 6.595599 6.732975 6.993971 6.778680 6.844277 18.30714 500
## sapply 6.696777 6.797059 7.146713 6.843316 6.915480 21.96104 500
fig <- fbox_plot(m8, "miliseconds")
fig
## Unit: microseconds
## expr min lq mean median uq max neval
## Date 1485.282 1529.3690 1774.4006 1558.9740 1763.431 11102.088 200
## iDate 579.973 605.6105 682.8841 629.2845 652.152 2348.392 200
fig <- fbox_plot(m9, "miliseconds")
fig
switch_function <- function(x) {
switch(x,
"a" = "apple",
"b" = "banana",
"c" = "cherry",
"default")
}
case_when_function <- function(x) {
case_when(
x == "a" ~ "apple",
x == "b" ~ "banana",
x == "c" ~ "cherry",
TRUE ~ "default"
)
}
# Create a vector of test values
test_values <- sample(c("a", "b", "c", "d"), 1000, replace = TRUE)
m10 <- microbenchmark(
switch = sapply(test_values, switch_function),
case_when = sapply(test_values, case_when_function),
times = 200L
)
print(m10)
## Unit: microseconds
## expr min lq mean median uq
## switch 638.592 648.9255 673.1453 655.4935 666.5335
## case_when 220866.923 237389.9065 238339.8927 238634.8595 239802.0580
## max neval
## 2028.926 200
## 319802.888 200
fig <- fbox_plot(m10, "microseconds")
fig
set.seed(123)
n <- 1e6
data <- data.table(
id = seq(n),
value = sample(seq(100), n, replace = TRUE)
)
casewhenf <- function(data){
df <- data %>%
mutate(category = case_when(
value <= 20 ~ "Low",
value <= 70 ~ "Medium",
value > 70 ~ "High"))
}
fcasef <- function(data){
df <- data %>%
mutate(category = fcase(
value <= 20, "Low",
value <= 70, "Medium",
value > 70, "High"))
}
m11 <- microbenchmark(
case_when = casewhenf(data),
fcase = fcasef(data),
times = 20
)
print(m11)
## Unit: milliseconds
## expr min lq mean median uq max neval
## case_when 61.31269 61.55607 63.34938 61.75220 62.84578 74.78099 20
## fcase 21.92034 22.02272 22.87098 22.06671 22.24433 28.54184 20
fig <- fbox_plot(m11, "miliseconds")
fig
set.seed(123)
DT <- data.table(
ID = 1:1e6,
Value1 = sample(c(NA, 1:100), 1e6, replace = TRUE),
Value2 = sample(c(NA, 101:200), 1e6, replace = TRUE)
)
# Define the functions
replace_na_f <- function(data){
DF <- data %>%
mutate(Value1 = replace_na(Value1, 0),
Value2 = replace_na(Value2, 0)) %>%
as.data.table()
}
fcoalesce_f <- function(data){
DF <- data %>%
mutate(Value1 = fcoalesce(Value1, 0L),
Value2 = fcoalesce(Value2, 0L))
}
m12 <- microbenchmark(
treplace_na = replace_na_f(DT),
tfcoalesce = fcoalesce_f(DT),
times = 20
)
print(m12)
## Unit: milliseconds
## expr min lq mean median uq max neval
## treplace_na 7.360906 7.519588 16.552348 7.806227 9.930596 79.96057 20
## tfcoalesce 1.429508 1.510178 4.800182 1.834007 2.431323 57.94924 20
fig <- fbox_plot(m12, "miliseconds")
fig
dt <- data.table(field_name = c("argentina.blue.man.watch",
"brazil.red.woman.shoes",
"canada.green.kid.hat",
"denmark.red.man.shirt"))
# Filter rows where 'field_name' does not contain 'red'
dtnot <- function(data){
filtered_dt <- data |> _[!grepl("red", field_name)]
}
dplyrnot <- function(data){
filtered_dt <- data %>% filter(!grepl("red", field_name))
}
m13 <- microbenchmark(
tdtnot = dtnot(dt),
tdplyrnot = dplyrnot(dt),
times = 100
)
print(m13)
## Unit: microseconds
## expr min lq mean median uq max neval
## tdtnot 100.698 108.3625 141.5394 122.258 137.2610 1801.522 100
## tdplyrnot 661.805 681.8175 718.8038 692.502 706.6035 2649.315 100
fig <- fbox_plot(m13, "microseconds")
fig
large_data <- data.table(
id = 1:100000,
var1 = rnorm(100000),
var2 = rnorm(100000),
var3 = rnorm(100000),
var4 = rnorm(100000)
)
# Benchmarking
m14 <- microbenchmark(
tidyr_pivot_longer = {
long_data_tidyr <- pivot_longer(large_data, cols = starts_with("var"),
names_to = "variable", values_to = "value")
},
data_table_melt = {
long_data_dt <- melt(large_data, id.vars = "id", variable.name = "variable",
value.name = "value")
},
times = 10
)
print(m14)
## Unit: microseconds
## expr min lq mean median uq max
## tidyr_pivot_longer 6340.041 6411.875 8279.2962 6477.0520 6587.493 24507.444
## data_table_melt 417.288 487.590 524.2942 506.1345 553.162 693.364
## neval
## 10
## 10
fig <- fbox_plot(m14, "microseconds")
fig
vec1 <- seq(1000)
vec2 <- seq(1000)
# Define functions to be benchmarked
expand_grid_func <- function() {
return(expand_grid(vec1, vec2))
}
CJ_func <- function() {
return(CJ(vec1, vec2))
}
# Perform benchmarking
m15 <- microbenchmark(
expand_grid = expand_grid_func(),
CJ = CJ_func(),
times = 10
)
print(m15)
## Unit: microseconds
## expr min lq mean median uq max neval
## expand_grid 2188.494 2201.409 2406.253 2253.6860 2305.142 3390.257 10
## CJ 323.364 350.064 1762.423 412.8205 794.674 12474.379 10
fig <- fbox_plot(m15, "microseconds")
fig